package de.uni_saarland.coli.doclists.texttools;

//import java.util.StringTokenizer;

import java.io.Serializable;
import java.util.StringTokenizer;

import de.uni_saarland.coli.doclists.document.ITextDocument;
import de.uni_saarland.coli.doclists.index.IDictionary;

public class SimpleTextTokenizer implements ITextTokenizer, Serializable {
	/**
	 * 
	 */
	private static final long serialVersionUID = 7429565366568810227L;
	StringTokenizer tok;
	IDictionary dict;
	
	public SimpleTextTokenizer(IDictionary dict) {
		this.tok = null;
		this.dict = dict;
	}
	
	public void startDocument(ITextDocument doc) {
		this.startText(doc.getText());
	}
	
	public void startText(String text) {
		this.tok = new StringTokenizer(text, " ,.-?!\n\t");
	}
	
	@Override
	public boolean hasNext() {
		if (this.tok == null)
			return false;
		return this.tok.hasMoreTokens();
	}

	@Override
	public int next() {
		return this.dict.addID(this.tok.nextToken());
	}

}
